Task 1

Get WD

getwd()
## [1] "/Users/tristanlarose/Desktop/R-Stuff/MATH4753TRISTAN2023/R/Lab9"

Task 2

Line A

Line A is taking sampling with replacement n*iter times. This means that every time something is sampled from the data, it is then replaced before the next sample is taken.

Line B

Line B is forming a confidence interval, so we can say with “confidence” that the mean of the values in the xstat vector is between \(\alpha/2\) and \(1-\alpha/2\).

Sample

It is important that the sample has equally likely members because the sample is assumed to be representative of the whole population. If there is a sampling bias, it cannot be assumed that the sample is represented.

set.seed(35) # This will give everyone the same sample
sam=round(rnorm(20,mean=10,sd=4),2)

unique(sample(sam,20,replace=TRUE) ) # repeat this line 5Xs
##  [1] 12.05 23.35 11.40 14.26  8.43 13.46  2.37  8.62 17.35 11.64 14.76  9.82
unique(sample(sam,20,replace=TRUE) ) # repeat this line 5Xs
##  [1]  8.43 14.26  7.75  2.89 12.05  2.37  9.82 13.46  6.93 10.53 23.35 11.78
## [13]  8.62 11.40  9.86
unique(sample(sam,20,replace=TRUE) ) # repeat this line 5Xs
##  [1]  9.86 10.53 14.26 17.35 12.05  6.93 23.35  8.62 11.64 16.69 11.40
unique(sample(sam,20,replace=TRUE) ) # repeat this line 5Xs
##  [1] 11.64  9.82  8.62 17.35 11.78 11.40  7.75  2.37 23.35  2.89
unique(sample(sam,20,replace=TRUE) ) # repeat this line 5Xs
##  [1]  7.75 14.76 16.69 17.35 14.26  8.62 11.40  8.43 12.05  2.89 10.53 23.35

I see that they will all have the same sample process from the repeated code, but they will return unique samples each time. They aren’t even all the same length.

#sample(sam,21,replace=FALSE)

When I run this code, I get the following message: Error in sample.int(length(x), size, replace, prob) : cannot take a sample larger than the population when ‘replace = FALSE’

This is because there were only 20 taken from the rnorm function called earler, so when we call for 21, it is larger than the population.

Task 3

myboot

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}

A, B, C, and D \(\alpha=0.05, iter=10000\)

A

set.seed(39)
sam=rnorm(25,mean=25,sd=10)

myboot2(x=sam,fun="mean")

B

set.seed(30)
sam=rchisq(20,df=3)

myboot2(x=sam,fun="mean")

C

set.seed(40)
sam=rgamma(30,shape=2,scale=3)

myboot2(x=sam,fun="mean")

D

set.seed(10)
sam=rbeta(20,shape1=3,shape2=4)

myboot2(x=sam,fun="mean")

A, B, C, and D \(\alpha=0.20, iter=10000\)

A

set.seed(39)
sam=rnorm(25,mean=25,sd=10)

myboot2(alpha=0.20,x=sam,fun="sd")

B

set.seed(30)
sam=rchisq(20,df=3)

myboot2(alpha=0.20,x=sam,fun="sd")

C

set.seed(40)
sam=rgamma(30,shape=2,scale=3)

myboot2(alpha=0.20,x=sam,fun="sd")

D

set.seed(10)
sam=rbeta(20,shape1=3,shape2=4)

myboot2(alpha=0.20,x=sam,fun="sd")

Task 4

Modified myboot

myboot3<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary

returnxstat <- xstat[1:30]

barplot(returnxstat)

returnxstat
}
sam=c(1,1,1,2,2,2,2,3,3,3,4,4) 
myboot3(x=sam,fun="median")

##  [1] 2.0 2.5 2.5 2.5 2.0 3.0 2.0 2.0 2.0 2.0 2.0 3.0 2.0 2.0 2.0 3.0 3.0 3.0 3.0
## [20] 2.0 2.0 3.0 3.5 2.5 2.0 2.0 2.0 2.0 2.0 2.0

(L,U) for the median is (1.5,3)

Task 5

mean/median

mdm <-function(x,mu,med) {
  
  
  mu = mean(x)
  med = median(x)
  
  mu/med
}
set.seed(39)
sam=rnorm(25,mean=25,sd=10)

mdm(x=sam,mu=25,med=23)
## [1] 1.03183

\(\alpha=0.05\)

A

set.seed(39)
sam=rnorm(25,mean=25,sd=10)



myboot3(x=sam,fun="mdm")

##  [1] 1.0141717 0.9522362 1.0520663 1.0327802 1.0391721 1.0634375 0.9997171
##  [8] 1.0176378 1.0248454 1.0402058 0.9901158 1.0701368 1.0843793 1.1040092
## [15] 1.0165815 1.0276343 0.9883575 1.0361317 1.0249415 1.0024847 1.0456552
## [22] 1.0659379 1.0294717 0.9797110 1.0149925 1.0483109 1.0640558 1.0195963
## [29] 1.0334366 0.9869223

B

set.seed(30)
sam=rchisq(20,df=3)

myboot3(x=sam,fun="mdm")

##  [1] 1.0097414 1.0530067 1.2137731 1.1601584 1.4005105 1.1873797 1.3905221
##  [8] 1.3944185 1.3913864 1.3102591 0.8957889 0.8783956 0.9634798 1.0461584
## [15] 1.0551337 1.4325600 1.0871506 1.0256790 0.8924351 1.4209163 1.1258250
## [22] 1.4073845 1.0665017 1.0736469 1.3876285 1.2718384 1.1640663 1.2012353
## [29] 1.2653725 1.1616840

C

set.seed(40)
sam=rgamma(30,shape=2,scale=3)

myboot3(x=sam,fun="mdm")

##  [1] 0.9351502 1.0256593 1.0447382 1.1904808 0.9060347 0.9544553 1.0351612
##  [8] 0.9028318 1.1028838 1.0012774 0.8448455 0.9838157 0.9156785 0.9457528
## [15] 1.1526521 1.0128856 0.9301049 0.9243346 0.9998187 1.0164687 0.9533716
## [22] 0.9870776 1.0200035 0.9505142 0.9875215 0.9754569 0.9991673 1.0334298
## [29] 1.0305885 1.0622277

D

set.seed(10)
sam=rbeta(20,shape1=3,shape2=4)

myboot3(x=sam,fun="mdm")

##  [1] 1.0924968 1.0469605 0.9319899 1.0598286 1.0705060 1.0349484 1.0107068
##  [8] 1.0505284 0.9768548 1.0952915 1.0264776 0.9918289 1.0106375 1.0207451
## [15] 1.0824103 1.1080719 1.0436432 1.0985352 1.1835269 1.0420932 1.0469172
## [22] 0.9620113 1.0069586 0.9654113 0.9710161 1.1555332 1.0241533 1.0498256
## [29] 1.0860702 1.0020583

\(\alpha=0.3\)

A

set.seed(39)
sam=rnorm(25,mean=25,sd=10)



myboot3(alpha=0.3,x=sam,fun="mdm")

##  [1] 1.0141717 0.9522362 1.0520663 1.0327802 1.0391721 1.0634375 0.9997171
##  [8] 1.0176378 1.0248454 1.0402058 0.9901158 1.0701368 1.0843793 1.1040092
## [15] 1.0165815 1.0276343 0.9883575 1.0361317 1.0249415 1.0024847 1.0456552
## [22] 1.0659379 1.0294717 0.9797110 1.0149925 1.0483109 1.0640558 1.0195963
## [29] 1.0334366 0.9869223

B

set.seed(30)
sam=rchisq(20,df=3)

myboot3(alpha=0.3,x=sam,fun="mdm")

##  [1] 1.0097414 1.0530067 1.2137731 1.1601584 1.4005105 1.1873797 1.3905221
##  [8] 1.3944185 1.3913864 1.3102591 0.8957889 0.8783956 0.9634798 1.0461584
## [15] 1.0551337 1.4325600 1.0871506 1.0256790 0.8924351 1.4209163 1.1258250
## [22] 1.4073845 1.0665017 1.0736469 1.3876285 1.2718384 1.1640663 1.2012353
## [29] 1.2653725 1.1616840

C

set.seed(40)
sam=rgamma(30,shape=2,scale=3)

myboot3(alpha=0.3,x=sam,fun="mdm")

##  [1] 0.9351502 1.0256593 1.0447382 1.1904808 0.9060347 0.9544553 1.0351612
##  [8] 0.9028318 1.1028838 1.0012774 0.8448455 0.9838157 0.9156785 0.9457528
## [15] 1.1526521 1.0128856 0.9301049 0.9243346 0.9998187 1.0164687 0.9533716
## [22] 0.9870776 1.0200035 0.9505142 0.9875215 0.9754569 0.9991673 1.0334298
## [29] 1.0305885 1.0622277

D

set.seed(10)
sam=rbeta(20,shape1=3,shape2=4)

myboot3(alpha=0.3,x=sam,fun="mdm")

##  [1] 1.0924968 1.0469605 0.9319899 1.0598286 1.0705060 1.0349484 1.0107068
##  [8] 1.0505284 0.9768548 1.0952915 1.0264776 0.9918289 1.0106375 1.0207451
## [15] 1.0824103 1.1080719 1.0436432 1.0985352 1.1835269 1.0420932 1.0469172
## [22] 0.9620113 1.0069586 0.9654113 0.9710161 1.1555332 1.0241533 1.0498256
## [29] 1.0860702 1.0020583

Task 6

new distributions

for mean

sam=rpois(20,lambda = 2)

myboot3(alpha=0.2,x=sam,fun="mean")

##  [1] 2.05 1.90 2.30 2.10 2.05 2.10 1.35 2.20 1.95 2.45 2.65 2.35 2.35 2.30 2.25
## [16] 2.35 2.15 1.80 2.05 2.00 1.90 2.30 2.10 2.10 2.15 1.75 2.05 2.20 2.30 2.25
sam=rt(20,1,1)

myboot3(alpha=0.2,x=sam,fun="mean")

##  [1]  1.766918 18.358477  9.119241 13.737628  8.210548  2.246146  2.008391
##  [8] 16.025656 12.807642  6.898917  5.315956  4.467021 16.362953  3.461330
## [15] 14.748711  6.808148  8.760255 13.741515  3.770067  9.230105  1.472186
## [22] 13.450949 13.906526 29.687999  9.188541  2.953155 22.153251  6.374731
## [29] 10.773822  9.424799
sam=runif(20,min=0,max=1)

myboot3(alpha=0.2,x=sam,fun="mean")

##  [1] 0.4695313 0.5150180 0.4849274 0.3698499 0.4398388 0.5374734 0.4491306
##  [8] 0.5189559 0.4628478 0.4385996 0.4570050 0.4217477 0.4785623 0.5577056
## [15] 0.3416400 0.3953368 0.3748446 0.4830833 0.3648258 0.5413605 0.4113897
## [22] 0.4865153 0.4341627 0.4631382 0.4335480 0.5612512 0.5648091 0.5087039
## [29] 0.4237169 0.4944229
sam=rweibull(20,shape=2,scale=1)

myboot3(alpha=0.2,x=sam,fun="mean")

##  [1] 0.9422220 0.9481268 0.8929954 1.0835943 0.9296055 1.1421578 0.5953011
##  [8] 0.8190905 0.7896739 0.9429592 0.7367795 1.0065662 1.0113487 0.8041763
## [15] 1.0452574 0.8022423 0.9103877 0.8721840 0.9431931 0.8101505 0.8675585
## [22] 0.8982946 1.1038430 0.8899226 1.0908838 0.8416496 0.6974515 0.8816464
## [29] 0.8567043 0.8787321

for variance

sam=rpois(20,lambda = 2)

myboot3(alpha=0.2,x=sam,fun="var")

##  [1] 7.039474 6.450000 3.818421 4.842105 6.576316 5.000000 2.365789 4.115789
##  [9] 1.315789 5.628947 3.628947 4.976316 3.989474 6.210526 4.765789 5.502632
## [17] 5.565789 2.365789 3.628947 2.894737 1.852632 5.923684 4.589474 7.460526
## [25] 2.578947 3.944737 5.852632 7.515789 8.263158 3.039474
sam=rt(20,1,1)

myboot3(alpha=0.2,x=sam,fun="var")

##  [1] 1308.3186795    0.1892129    2.1357237    1.7970719    0.1223572
##  [6] 2480.1417952 1314.4442738 1309.3271554 1312.1353210 1312.2036481
## [11] 1310.3309113 1313.1235886 1311.1127716    0.2232569 3505.0551281
## [16]    3.9987247    0.2784841 2486.2229743    0.3004680    3.8472672
## [21] 1304.9338589    0.3695109 1313.3997511    2.0560489    2.2640034
## [26] 1305.3693876 1311.6433806    0.3865465    5.1826179 1301.2307824
sam=runif(20,min=0,max=1)

myboot3(alpha=0.2,x=sam,fun="var")

##  [1] 0.03826351 0.06704864 0.08882753 0.09162590 0.04655119 0.07099621
##  [7] 0.08399024 0.12058729 0.09950229 0.06560584 0.06180785 0.05786724
## [13] 0.06810638 0.06925508 0.08719264 0.08755745 0.10242883 0.10980806
## [19] 0.07370101 0.04997499 0.06594581 0.08259963 0.09726349 0.09336185
## [25] 0.08936755 0.07086981 0.08173503 0.09991886 0.09140170 0.07099633
sam=rweibull(20,shape=2,scale=1)

myboot3(alpha=0.2,x=sam,fun="var")

##  [1] 0.1312020 0.1894797 0.2476022 0.3123286 0.2205885 0.2342553 0.1713307
##  [8] 0.2557151 0.2043302 0.1955178 0.1445262 0.2082293 0.1519552 0.2976476
## [15] 0.2173270 0.1003913 0.2379961 0.1498003 0.2171362 0.1895045 0.2613973
## [22] 0.2091364 0.2605430 0.1656416 0.3231527 0.2079619 0.2584672 0.1883284
## [29] 0.1140527 0.2096883

Task 7

2 interesting statisctics

set.seed(68)
sam=rnorm(20,mean=10,sd=4) 

myboot3(x=sam,fun="IQR")

##  [1] 4.200316 5.270480 3.897552 4.778771 4.013224 7.400130 4.450257 3.484329
##  [9] 5.832866 5.063257 4.330650 5.495033 5.885148 4.450257 6.114798 6.167885
## [17] 3.863094 4.995039 3.611357 3.188615 3.647779 3.496190 2.292009 3.815194
## [25] 4.061801 6.562145 5.552263 3.689134 4.144333 5.267821
set.seed(68)
sam=rnorm(20,mean=10,sd=4) 

myboot3(x=sam,fun="quantile")

##  [1]  6.716014  9.458889 10.896016 13.659205 15.743750  1.155378  9.347052
##  [8] 12.489596 14.617532 15.743750  1.155378  8.055388  9.801210 11.952940
## [15] 13.997038  6.716014  8.880434 12.816496 13.659205 15.743750  1.155378
## [22]  9.484053 10.143532 13.497278 15.743750  1.155378  5.325855  9.550223
## [29] 12.725985 15.743750

checking with myboot

set.seed(68)
sam=rnorm(20,mean=10,sd=4)

myboot3(x=sam)

##  [1] 11.405468 11.109820  9.592962 11.565877 10.892483  8.614390 10.182968
##  [8]  9.578886 11.089884  9.749299 11.248788 10.682887 11.400573 10.963674
## [15] 11.110339 10.918552  9.745078  9.521175 11.474818 10.486004  9.660582
## [22] 10.787800  9.138049 10.868586 11.880370 10.862518 10.809010 10.403517
## [29] 10.565306 11.220497

Task 8

ddt<-read.csv("DDT.csv")
MATH4753TRISTAN2023::myboot(x=ddt$DDT)

##  [1] 24.67653 16.41625 22.04153 23.05035 24.87833 29.43125 18.58979 29.23403
##  [9] 22.76632 33.40972 22.26194 33.65479 20.38958 28.97993 24.44479 31.11208
## [17] 29.47812 20.49479 14.50069 15.65132 26.87764 25.89486 31.90097 24.01243
## [25] 26.29097 28.48236 23.44667 37.20924 19.92639 10.91188